In [1]:
import splat
import wisps
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
%matplotlib inline
In [2]:
from wisps import datasets
In [3]:
spex=(datasets['spex'].rename(columns={'l_snr': 'snr1', 'l_t_snr':'snr2', 'f_x':'f_test'}))
#trash
trash=pd.read_pickle(wisps.OUTPUT_FILES+'/trash.pkl')
trash['label']=0.
In [4]:
man=(datasets['manjavacas']).rename(columns={'f':'f_test'})
schn=(datasets['schneider']).rename(columns={'f':'f_test'})
In [5]:
sf=pd.read_pickle(wisps.OUTPUT_FILES+'/selection_function.pkl')
In [6]:
simulated_spectra=pd.DataFrame.from_records(pd.DataFrame(sf).values.flatten()).rename(columns={'f':'f_test', 'Names':'name'})
simulated_spectra=(simulated_spectra[simulated_spectra.snr1>3.])
In [7]:
spex['spt']=spex.spt.apply(wisps.make_spt_number)
man['spt']=man.spt.apply(wisps.make_spt_number)
schn['spt']=schn.spt.apply(wisps.make_spt_number)
trash['spt']=trash.spt.apply(wisps.make_spt_number)
In [8]:
len(trash)
Out[8]:
In [9]:
spex['name']=spex['grism_id']
In [10]:
features=['CH_4/H-Cont', 'CH_4/H_2O-1', 'CH_4/H_2O-2', 'CH_4/J-Cont',
'H-cont/H_2O-1', 'H-cont/H_2O-2', 'H-cont/J-Cont', 'H_2O-1/J-Cont',
'H_2O-2/H_2O-1', 'H_2O-2/J-Cont', 'spt', 'spex_chi', 'name', 'snr2','snr1', 'line_chi', 'f_test', 'label']
In [11]:
from scipy import stats
In [12]:
def f_test_comp(x):
return stats.f.cdf(x, 2, 1, 0, scale=1)
man['x']=man.spex_chi/man.line_chi
schn['x']=schn.spex_chi/schn.line_chi
simulated_spectra['x']=simulated_spectra.spex_chi/simulated_spectra.line_chi
man['f_test']=man.x.apply(f_test_comp)
schn['f_test']=schn.x.apply(f_test_comp)
simulated_spectra['f_test']=simulated_spectra.x.apply(f_test_comp)
spex['f_test']=(spex.spex_chi/spex.line_chi).apply(f_test_comp)
In [13]:
simulated_spectra['spt']=simulated_spectra.spt_new.apply(wisps.make_spt_number)
In [14]:
simulated_spectra['name']=['spect {}'.format(idx) for idx in np.arange(len(simulated_spectra))]
In [15]:
def add_labels(spt):
label=0.0
if spt>=17.:
label=1.0
return label
In [16]:
spex['label']=spex.spt.apply(add_labels)
man['label']=man.spt.apply(add_labels)
schn['label']=schn.spt.apply(add_labels)
simulated_spectra['label']=simulated_spectra.spt.apply(add_labels)
trash['label']=0.
In [17]:
#en(trash), len(simulated_spectra)
In [18]:
trash['name']=trash.grism_id
In [19]:
#a[features]
In [20]:
training_set=pd.concat( [trash[features], spex[features], man[features], schn[features]])
In [21]:
len(trash), len(spex)+len(man)+len(schn)+len(simulated_spectra)
Out[21]:
In [22]:
training_set.label.plot(kind='hist')
Out[22]:
In [23]:
feats=[x for x in features if x not in ['name', 'label']]
In [24]:
training_set[feats]=wisps.Annotator.reformat_table(training_set[feats]).applymap(float)
training_set.to_pickle(wisps.LIBRARIES+'/training_set.pkl')
In [25]:
training_set.label.plot(kind='hist')
Out[25]:
In [ ]: